# -*- coding: utf-8 -*-
import os
import datetime
date = datetime.datetime.now()
BOT_NAME = 'tianyancha'

# 日志等级
LOG_LEVEL = 'WARNING'

# 深度优先
SCHEDULER_QUEUE_CLASS = 'scrapy_redis.queue.LifoQueue'

SPIDER_MODULES = ['TianYanCha_MedicalCompany.spiders']
NEWSPIDER_MODULE = 'TianYanCha_MedicalCompany.spiders'

# 不遵守ROBOTS协议
ROBOTSTXT_OBEY = False

# 禁用Scrapy Cookie中间件  ps:这个不关的话自定义Cookie中间件无效
COOKIES_ENABLED = False

# 并发量 太高会直接封号的
CONCURRENT_REQUESTS = 2

# 重试无效响应
DOWNLOAD_FAIL_ON_DATALOSS = False

# MONGO_URI = 'mongodb://192.168.0.100:27017'
# MONGO_URI = 'mongodb://127.0.0.1:27017'
MONGO_URI = 'mongodb://192.168.0.213:27017'
MONGO_DB = "CompanyData"
MONGO_COLL = "TianYanCha_MedicalCompany--{}.{}.{}".format(date.year, date.month, date.day)
# MONGO_AUTHENTICATE = {'name': 'root', 'password': '???', 'source': '???'}

# 使用scrapy-redis里的去重组件，不使用scrapy默认的去重
DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
# 使用scrapy-redis里的调度器组件，不实用scrapy默认的调度器
SCHEDULER = "scrapy_redis.scheduler.Scheduler"
# 允许暂停，redis请求记录不丢失
SCHEDULER_PERSIST = True
REDIS_HOST = '127.0.0.1'
# REDIS_HOST = '192.168.0.100'
REDIS_PORT = 6379
REDIS_PARAMS = {'password': 'jianshu2018'}

# 保存源码的路径
# SOURCE = r'Z:\pyz\Data\China\TianYanCha_MedicalCompany_{}.{}.{}'.format(date.year, date.month, date.day)
SOURCE = r'Z:\pyz\Data\China\TianYanCha_MedicalCompany_2020.7.23'
# SOURCE = r'Z:\pyz\Data\China\test'
# 创建路径
os.makedirs(SOURCE, exist_ok=True)

DOWNLOADER_MIDDLEWARES = {
        'TianYanCha_MedicalCompany.middlewares.UserAgentmiddleware': 600,           # 随机Headers中间件'
}

ITEM_PIPELINES = {
   'TianYanCha_MedicalCompany.pipelines.TianyanchaMedicalcompanyPipeline': 300,
}
